%20210602 v1.01
%20210828 v1.02

function [final_result,expected_mat,expected_mat_split,image]=fpcdh_distribution2(pcdhg_cutoff,barcode_shift,subplotx,subploty,shuffle_times,Labels)
%
long=size(pcdhg_cutoff,1);
tab=tabulate(pcdhg_cutoff(long,:));
cluster_num=max(tab(:,1));
clear tab
for k=[0:cluster_num+1]
    clearvars -except pcdhg k cluster_num result cluster expected_mat real_cluster t_cluster pcdhg_cutoff barcode_shift subplotx subploty long shuffle_times expected_mat_split Labels
    
    if k~=cluster_num+1
    calculate_pcdhg=pcdhg_cutoff(1:long-1,find(pcdhg_cutoff(long,:)==k));
    else
        calculate_pcdhg=pcdhg_cutoff(1:long-1,:);
    end
    

pcdh2=ceil(calculate_pcdhg./100000);
datasize=size(calculate_pcdhg);
counts=datasize(1);
srow=zeros(1,counts);
for i=1:counts
   srow(1,i)=sum(pcdh2(i,:));
end
scol=zeros(1,datasize(2));
for j=1:datasize(2)
    scol(1,j)=sum(pcdh2(:,j));
end
means=mean(scol);%numbers of isoforms expressed in one cell
possiblity=scol./counts;%the possiblity of expression
m=max(scol);%bin number
[a,cells]=size(scol);
possiblesrow=srow./cells;
varexpected2=sum((1-possiblesrow).*possiblesrow);%expected var
[sizerow,sizecol]=size(scol);
x=1:m;
y5=[];
for v=1:shuffle_times
    y2=zeros(1,length(scol));
    for t=1:counts
        y=binornd(1,possiblesrow(t),1,sizecol);
        y3(t,:)=y;
        y2=y2+y;
    end
    y4(v,:)=y2;
    y5=[y3,y5];
    expected_mat_split{k+1,v}=y3;
end
expected_mat{k+1}=y5;

%y4ʵshuffle_timesģĽʵֵ˺ܶ࣬ڻͼʱҪnormalization
%y4 was the result of shuffle, 100times bigger than observe, it need
%normalization
m2=max(y);
subplot(subplotx,subploty,(k+1));
image=histogram(y4,'BinWidth',1,'DisplayStyle','stairs','LineWidth',2,'EdgeColor','r','BinWidth',1,'Normalization','Probability');
xlim([0,long]);
ylim([0,0.5]);
hold on
subplot(subplotx,subploty,(k+1));
image=histogram(scol,'BinWidth',1,'DisplayStyle','bar','EdgeColor','k','FaceColor','g','BinWidth',1,'Normalization','Probability');
xlim([0,long]);
ylim([0,0.5]);
if k~=cluster_num+1
title(['cluster',num2str(Labels(k+1))]);
else
    title('total');
end


%calculate the var
varexpected=var(y2);
varobserve=var(scol);

f2=varobserve/varexpected2;
f1=varobserve/varexpected;
[H,P]=vartest(scol,varexpected2);
p_value1=fcdf(f1,length(scol)-1,length(y)-1);
p_value2=fcdf(f2,length(scol)-1,length(scol)-1);
p_value3=P;

%cutoffÿclusterںԭȸжϸ
%the number in each cluster before and after cutoff
cell_num(1)=size(scol,2);
cell_num(2)=size(find(barcode_shift(:,3)==k),1);
if k==cluster_num+1
    cell_num(2)=size(barcode_shift,1);
end
cell_num(3)=cell_num(1)./cell_num(2);
result((k+1),:)=[k,varexpected,varexpected2,varobserve,f1,f2,p_value1,p_value2,p_value3,mean(scol),mean(scol(find(scol~=0))),cell_num(1),cell_num(2),cell_num(3)];




end
%%
final_result=array2table(result,'VariableNames',{'ClusterName','varexpected_simulate','varexpected2_paper','varobserve','f1','f2','p_value1_Ftestsimulate_obv','p_value1_Ftestpaper_obv','p_value3_expected2_obv','isoform_num','isoform_num_E0','cell_num_now','cell_num_before','percentage'});
%writetable(final_result,'result fig\401_neuron_UMI1.csv');
%save('analysis302');